clear all
set more off
capture log close


global rawdata = "D:\Data\Workdata\706991\Hours2023\rawdata"
global workdata = "D:\Data\Workdata\706991\Hours2023\workdata"

********************************************************************************
*********************** PRODUCING REGRESSION DATA ******************************
********************************************************************************

********************************************************************************
* 			INFORMATION ABOUT TENURE AND WORKPLACE ID FROM IDAN				   *
********************************************************************************
use "$rawdata\idan.dta", clear
destring *, replace
rename job_cvrnr cvrnr

keep if (job_type == "H") //Primary job
drop job_type
drop if lbnr == 0

gen tenure = (aar - ansaar)
replace tenure = . if tenure > 50 | tenure < 0
drop if tenure == .
drop ansaar

bys lbnr aar: gen firm_size = _N

sort pnr aar
xtset pnr aar
gen long leadlbnr = F.lbnr
gen new_workplace = (lbnr != leadlbnr & leadlbnr != .)   
drop leadlbnr

sort pnr aar
save "$workdata\data_idan_full", replace

********************************************************************************
* 				 INFORMATION ABOUT EXPERIENCE FROM IDAP			     		   *
********************************************************************************
use "$rawdata\idap", clear
destring *, replace

replace erhverv_1979 = 0 if erhverv_1979 == .
replace erhverv_1979=erhverv_1979/1000
gen exp = (erhverv_akk/1000 + erhverv_1979)
drop if exp == .
replace exp = round(exp)
drop erhverv*

sort pnr aar
save "$workdata\data_idap_full", replace

********************************************************************************
* 			       INFORMATION ABOUT EDUCATION FROM UDDA			     	   *
********************************************************************************
use "$rawdata\udda", clear
destring *, replace

drop if hfaudd == 1  									// no recorded education level
replace hfaudd = hfaudd*10 if hfaudd < 1000 			// belongs to the group but no further information 

gen schooling = .
replace schooling = 1 if hfaudd >= 1000 & hfaudd < 2000  /*High-school or less*/
replace schooling = 2 if hfaudd >= 2000 & hfaudd < 3000  /*IVU 1-6*/
replace schooling = 2 if hfaudd >= 3000 & hfaudd < 4000  /*Erhvervsfaglig*/
replace schooling = 3 if hfaudd >= 4000 & hfaudd < 5000  /*KVU*/
replace schooling = 4 if hfaudd >= 5000 & hfaudd < 6000  /*MVU*/
replace schooling = 5 if hfaudd >= 6000 & hfaudd < 8500  /*LVU*/
replace schooling = 4 if hfaudd >= 8500 & hfaudd < 9000  /*Master*/
replace schooling = 0 if hfaudd >= 9000 | schooling == . /*Other + missing*/

drop hfaudd
sort pnr aar
save "$workdata\data_udda_full", replace

********************************************************************************
* 		 INFORMATION ABOUT INDIVIDUAL CHARACTERISTICS FROM PERSONER	     	   *
********************************************************************************
use "$rawdata\personer", clear
gen partner = (civst == "G" | civst == "P")
drop civst

destring *, replace
gen woman = (koen == 2)
drop koen
rename familie_nr family_id
rename alder age

sort pnr aar
save "$workdata\data_personer_full", replace

********************************************************************************
* 		     INFORMATION ABOUT NUMBER OF CHILDREN FROM FAMILIE	         	   *
********************************************************************************
use "$rawdata\familie", clear
destring *, replace

rename c_antboernf num_children
rename c_aldaeldst age_oldest
rename c_aldyngst age_youngest
rename familie_id family_id

sort family_id aar
save "$workdata\data_familie_full", replace

********************************************************************************
* 		          INFORMATION ABOUT WORKPLACE FROM IDAS	                	   *
********************************************************************************
use "$rawdata\idas", clear                                    
destring *, replace

gen branche = arb_branche_db7793
replace branche = arb_branche_db03 if aar > 2003
rename  arb_branche_db07 branche07
replace branche07=branche07*10 if aar==2008

gen industry = .	
replace industry = 1 	if (branche < 150000) 						/*Primary*/
replace industry = 2  	if (branche >= 150000 & branche < 400000) 	/*Industri- og fremstillingsvirksomhed*/
replace industry = 3  	if (branche >= 400000 & branche < 450000) 	/*El-, gas-, vand- og varmeforsyning samt kloak, affald*/
replace industry = 4  	if (branche >= 450000 & branche < 500000) 	/*Bygge- og anlægsvirksomhed*/
replace industry = 5  	if (branche >= 500000 & branche < 550000) 	/*Handel og reparationsindustri*/
replace industry = 20  	if (branche >= 550000 & branche < 600000) 	/*Hotel- og restaurationsvirksomhed, Organisationer, forlystelser og sport*/
replace industry = 7  	if (branche >= 600000 & branche < 650000) 	/*Transport og godshåndtering*/
replace industry = 8 	if (branche >= 650000 & branche < 700000) 	/*Pengeinstitutter, forsikring og finansieringsvirksomhed*/
replace industry = 9 	if (branche >= 700000 & branche < 750000) 	/*Fast ejendom, udlejning og forretningsservice*/
replace industry = 10 	if (branche >= 750000 & branche < 800000)	/*Offentlig administration, forsvar og socialforsikring*/
replace industry = 11 	if (branche >= 800000 & branche < 850000) 	/*Undervisning*/
replace industry = 12 	if (branche >= 850000 & branche < 900000) 	/*Social- og sundhedsvæsen*/
replace industry = 3 	if (branche >= 900000 & branche < 910000) 	/*El-, gas-, vand- og varmeforsyning samt kloak, affald*/
replace industry = 20 	if (branche >= 910000 & branche <= 990000) 	/*Andre*/
replace industry = 200 	if (branche > 9900000) 						/*Uoplyst, men non-missing*/

replace industry = 1 	½a	if (branche07 < 100000) 						/*Primary*/
replace industry = 2  	if (branche07 >= 100000 & branche07 < 350000) 	/*Industri- og fremstillingsvirksomhed*/
replace industry = 3  	if (branche07 >= 350000 & branche07 < 410000) 	/*El-, gas-, vand- og varmeforsyning samt kloak, affald*/
replace industry = 4 	if (branche07 >= 410000 & branche07 < 450000) 	/*Bygge- og anlægsvirksomhed*/
replace industry = 5  	if (branche07 >= 450000 & branche07 < 490000) 	/*Handel og reparationsindustri*/
replace industry = 7  	if (branche07 >= 490000 & branche07 < 550000) 	/*Transport og godshåndtering*/
replace industry = 20  	if (branche07 >= 550000 & branche07 < 580000) 	/*Hotel- og restaurationsvirksomhed, Organisationer, forlystelser og sport*/
replace industry = 20 	if (branche07 >= 580000 & branche07 < 640000)   /*Andre*/
replace industry = 8 	if (branche07 >= 640000 & branche07 < 680000) 	/*Pengeinstitutter, forsikring og finansieringsvirksomhed*/
replace industry = 9 	if (branche07 >= 680000 & branche07 < 690000) 	/*Fast ejendom, udlejning og forretningsservice*/
replace industry = 20 	if (branche07 >= 690000 & branche07 < 840000)	/*Andre*/
replace industry = 10 	if (branche07 >= 840000 & branche07 < 850000)	/*Offentlig administration, forsvar og socialforsikring*/
replace industry = 11 	if (branche07 >= 850000 & branche07 < 860000) 	/*Undervisning*/
replace industry = 12 	if (branche07 >= 860000 & branche07 < 900000) 	/*Social- og sundhedsvæsen*/
replace industry = 20 	if (branche07 >= 900000 & branche07 <= 990000) 	/*Andre*/
replace industry = 200 	if (branche07 == 999999) 						/*Uoplyst, men non-missing*/

keep lbnr aar industry
sort lbnr aar
save "$workdata\data_idas_full", replace 

********************************************************************************
* 			    INFORMATION ABOUT JOBLEVEL FROM RAS   			     	       *
********************************************************************************
use "$rawdata\ras", clear
destring *, replace

gen pos = .
replace pos = 1 if job_socio_kode == 131			// Top manager
replace pos = 2 if job_socio_kode == 132 			// Function managers
replace pos = 3 if job_socio_kode == 133			// White collar
replace pos = 4 if (job_socio_kode == 134 | job_socio_kode == 135)	// blue/gray collar

xtset pnr aar
gen leadpos = F.pos

* job_funktion_kode: 1996-2007; job_sektor_kode: 2008-2016

gen private=.
replace private=1 if job_funktion_kode==32
replace private=1 if job_funktion_kode==42
replace private=1 if job_funktion_kode==52
replace private=1 if job_funktion_kode==80
replace private=1 if job_funktion_kode==90

replace private=1 if job_sektor_kode==18
replace private=1 if job_sektor_kode==28
replace private=1 if job_sektor_kode==32
replace private=1 if job_sektor_kode==38
replace private=1 if job_sektor_kode==42
replace private=1 if job_sektor_kode==48
replace private=1 if job_sektor_kode==52
replace private=1 if job_sektor_kode==58
replace private=1 if job_sektor_kode==62
replace private=1 if job_sektor_kode==81

drop if job_funktion_kode==99
replace private=0 if private==.

drop job*

sort pnr aar
save "$workdata\data_RAS_full", replace

********************************************************************************
* 				 INFORMATION ABOUT EXPORT AND SALES FROM FIRM
********************************************************************************
use "$rawdata\firm", clear
keep cvrnr oms_firm eksport_ialt aar
destring *, replace

keep if aar >= 1994
gen export_pos = (eksport_ialt > 0)
rename eksport_ialt export
rename oms_firm sales

sort cvrnr aar
save "$workdata\data_firm_full", replace

********************************************************************************
********************** MERGE ALL DATASETS **************************************
********************************************************************************
*Merging AKU with IDA
use "$workdata\hoursdata_1_X", clear 
keep if aar >= 1996
keep if h_actual >0 & h_actual !=.
keep if h_usual >0 & h_usual !=.
merge 1:1 pnr aar using "$workdata\data_personer_full" 
drop if _m !=3
drop _m 
sort pnr aar
merge 1:1 pnr aar using "$workdata\data_udda_full"
drop if _m != 3
drop _m
sort pnr aar 
merge 1:1 pnr aar using "$workdata\data_idap_full"
drop if _merge!=3 
drop _m
sort pnr aar 
merge 1:1 pnr aar using "$workdata\data_RAS_full"
drop if _merge!=3 
drop _m
sort pnr aar 
merge 1:1 pnr aar using "$workdata\data_idan_full"
drop if _merge!=3 
drop _m
sort family_id aar 
merge m:1 family_id aar using "$workdata\data_familie_full"
drop if _merge!=3 
drop _m
sort lbnr aar
merge m:1 lbnr aar using "$workdata\data_idas_full"
drop if _merge!=3 
drop _m
sort cvrnr aar
merge m:1 cvrnr aar using "$workdata\data_firm_full"
drop if _merge==2 
drop _m
sort pnr aar
save "$workdata\ida_full1", replace 

********************************************************************************
* 		         CLEAN DATA AND CREATING ADDITIONAL VARIABLES	          	   *
********************************************************************************
use "$workdata\ida_full1", clear
rename aar year
gen age_sq = age^2
gen tenure_sq = tenure^2
gen exp_sq = exp^2
gen num_children_sq = num_children^2

*promotion categories
forvalues i = 1/3 {
	gen prom`i' = 0
}

replace prom1 = 1 if pos == 2 & leadpos == 1 

*work flexibility
gen d_frequent = (evening == 1 | night == 1 | sat == 1 |  sun == 1)
gen d_sometimes = (evening == 2 | night == 2 | sat == 2 |  sun == 2)

*education dummies 
gen hs_less = (schooling==1 | schooling == 2) 
gen kvu = (schooling ==3) 
gen mvu = (schooling ==4) 
gen lvu = (schooling ==5)

*disco
gen disco2 = disco
replace disco2 = disco2*10 if disco2 < 100
replace disco2 = trunc(disco2/10)
gen disco1 = trunc(disco2/10)

*children
gen oldest_child02 = (age_oldest <= 2)
gen oldest_child34 = (age_oldest >= 3 & age_oldest <=4)
gen oldest_child5 = (age_oldest >= 5 & age_oldest !=.)

gen youngest_child02 = (age_youngest <= 2)
gen youngest_child34 = (age_youngest >= 3 & age_youngest <=4)
gen youngest_child5 = (age_youngest >= 5 & age_youngest !=.)

*interval dummies for h_actual: 
gen ha0_36 = ( h_actual < 37 )
gen ha37 = ( h_actual == 37 )
gen ha38_47 = ( h_actual>=38 & h_actual<=47 )
gen hagt48 = ( h_actual>=48 )

*interval dummies for h_usual: 
gen hu0_36 = ( h_usual < 37 )
gen hu37 = ( h_usual == 37 )
gen hu38_47 = ( h_usual>=38 & h_usual<=47 )
gen hugt48 = ( h_usual>=48 )

drop if h_usual == . | h_actual == . | h_usual == 0 | h_actual == 0 /*keep only people with observed hours*/

gen h2a = h_actual/100
gen h2u = h_usual/100

*save final data set
save "$workdata\regression_data_full", replace

drop if sales == .
drop if export == .
save "$workdata\regression_data_full1", replace
